import datetime
print(datetime.datetime.today())
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere / mostrare il codice"></form>''')
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())
url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())
url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
#regional data preparation
data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]
def region_apply(region):
for x in region:
x['new_cases'] = x['totale_casi'].diff()
x['new_deaths'] = x['deceduti'].diff()
x['new_recovered'] = x['dimessi_guariti'].diff()
x['new_swabs'] = x['tamponi'].diff()
#add a day/day-1 percentage change for new_cases
x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
#detect ratio
x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
return;
region_apply([data_region_Abruzzo])
region_apply([data_region_Basilicata])
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]
cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])
cases_Nordovest['data'] = cases_Nordovest.index
cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])
cases_Nordest['data'] = cases_Nordest.index
cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])
cases_Centro['data'] = cases_Centro.index
cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])
cases_Sudisole['data'] = cases_Sudisole.index
cases_Nordovest.tail(5)
fig2 = px.bar(data_national, x='data', y='totale_casi',
hover_data=['totale_casi'], color='totale_casi',
height=600, color_continuous_scale='Sunsetdark')
fig2.update_layout(title_text='Total COVID19 Cases - Italy',
xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000, gridcolor='White')
fig2.show()
fig22 = px.bar(data_national, x='data', y='totale_positivi',
hover_data=['totale_positivi'], color='totale_positivi',
height=600, color_continuous_scale='Sunsetdark')
fig22.update_layout(title_text='Active COVID19 Cases - Italy',
xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000, gridcolor='White')
fig22.show()
fig = go.Figure()
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
line_color='deepskyblue'))
fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
xaxis_rangeslider_visible=True)
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
line_color='deepskyblue'))
fig.update_layout(title_text='Daily swabs - All Italy and Regions',
xaxis_rangeslider_visible=True)
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
line_color='green'))
fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
xaxis_rangeslider_visible=True)
fig.update_yaxes(tick0=0, dtick=500)
fig.show()
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
xaxis_rangeslider_visible=True)
fig.update_yaxes(tick0=0, dtick=10000)
fig.show()
fig3 = go.Figure()
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
line_color='red'))
fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
fig4 = go.Figure()
#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
# line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
line_color='green'))
fig4.update_layout(title_text="Daily Total Hospital - Italy",
xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
fig5 = go.Figure()
fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
line_color='blue'))
fig5.update_layout(title_text="Daily Total UTI - Italy",
xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
fig6 = go.Figure()
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
line_color='red'))
fig6.update_layout(title_text="Daily v- Italy",
xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
fig7 = go.Figure()
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
line_color='green'))
fig7.update_layout(title_text="Daily change - Italy",
xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
fig8 = go.Figure()
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
line_color='red'))
fig8.update_layout(title_text="Daily UTI vs Death - Italy",
xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()
fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
line_color='red'))
fig9.update_layout(title_text="Daily cases GE - Italy",
xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)
fig10 = go.Figure()
fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
line_color='red'))
fig10.update_layout(title_text="Daily cases Liguria - Italy",
xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
fig11 = go.Figure()
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
line_color='yellow'))
fig11.update_layout(title_text="Daily change Liguria - Italy",
xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
fig12 = go.Figure()
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
# line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
# line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect ratio Liguria - Italy",
line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
# line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
# line_color='yellow'))
fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
fig13 = go.Figure()
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
line_color='purple'))
fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
#print(data_national.dtypes)
import pandas as pd
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import warnings
#Librerie di base
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib.pyplot import figure
import plotly.tools as tls
import math
import statistics as st
import seaborn as sns
from io import StringIO
import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pylab as pl
import scipy.stats as scs
from itertools import product # some useful functions
from tqdm import tqdm_notebook
import time
import timeit
import pytest
import os
import pyarrow
#Pacchetto Sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.svm import SVC
#Per Modello XGBoost
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import category_encoders as ce
warnings.simplefilter('ignore')
ds=pd.DataFrame()
ds['data']=data_national['data']
ds['new']=data_national['new_cases']
ds.set_index('data')
ds = ds.dropna()
model = ARIMA(ds['new'], order=(1,1,4))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())
forecast = model_fit.forecast(steps=10)[0]
print(forecast)
X = ds['new'].values
size = int(len(X) * 0.999)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
model = ARIMA(history, order=(1,1,4))
model_fit = model.fit(disp=0)
output = model_fit.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
#pyplot.plot(test)
#pyplot.plot(predictions, color='red')
#pyplot.show()
forecast = model_fit.forecast(steps=2)[0]
print(forecast)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()
'''
import warnings
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
# prepare training dataset
train_size = int(len(X) * 0.99)
train, test = X[0:train_size], X[train_size:]
history = [x for x in train]
# make predictions
predictions = list()
for t in range(len(test)):
model = ARIMA(history, order=arima_order)
model_fit = model.fit(disp=0)
yhat = model_fit.forecast()[0]
predictions.append(yhat)
history.append(test[t])
# calculate out of sample error
error = mean_squared_error(test, predictions)
return error
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
#dataset = dataset.astype('float32')
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
mse = evaluate_arima_model(dataset, order)
if mse < best_score:
best_score, best_cfg = mse, order
print('ARIMA%s MSE=%.3f' % (order,mse))
except:
continue
print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))
# load dataset
# evaluate parameters
p_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
d_values = range(0, 10)
q_values = range(0, 10)
#warnings.filterwarnings("ignore")
evaluate_models(ds['new'].values, p_values, d_values, q_values)
'''
#Best ARIMA(1, 1, 4) MSE=154507.826
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
dat= data_national.drop(['note','stato','variazione_totale_positivi','daily_cases_perc_change','daily_swab_perc_change',
'daily_unique_tested_perc_change','detect_ratio_swabs','detect_ratio_cases'], axis = 1).fillna(999999999)
#dat=data_national.fillna(-9999)
dat.set_index('data')
dat = dat.drop(['data'], axis = 1)
dat.dtypes
model = VAR(dat)
model_fit = model.fit()
#model_fit.summary()
pred = model_fit.forecast(model_fit.y, steps=5)
print(pred)
# model fitting
#model = VAR(dat)
#results = model.fit(maxlags=30, ic='aic')
#results.summary()
# forecasting
#lag_order = results.k_ar
#results.forecast(x_train4.values[-lag_order:], 5)
# plotting
plt.figure(figsize=(300, 300))
model_fit.plot_forecast(10)
# Evaluation
#fevd = model_fit.fevd(5)
#fevd.summary()